from struct import *
import sys

print "Begin:"

relevenceDocDict = {}
relevenceDocList = []

inputFileName = "/data5/team/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/resultsNeededToAnalyze.txt"
inputFileHandler = open(inputFileName,"r")

outputFileName = "/data5/team/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/priorityTrecDocumentsIDs_all_sorted_with_index_number_BM25_oriented.txt"
outputFileHandler = open(outputFileName,"w")

for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    if len( lineElements ) == 34:
        if lineElements[33].startswith("GX"):
            if lineElements[33] not in relevenceDocDict:
                relevenceDocDict[ lineElements[33] ] = 1
            else:
                relevenceDocDict[ lineElements[33] ] += 1
        else:
            pass
            

for document in relevenceDocDict:
    if document.startswith("GX"):
        pass
    else:
        print "unexpected document:",document
        
print "len(relevenceDocDict):",len(relevenceDocDict)

relevenceDocList = relevenceDocDict.keys()
relevenceDocList.sort(cmp=None, key=None, reverse=False)

for index,doc in enumerate(relevenceDocList):
    outputFileHandler.write(str(index) + " " + doc + " " + "2" + "\n")

inputFileHandler.close()
outputFileHandler.close()


print "End..."

